{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c261e5f4-17a8-40da-beb9-599f1717e0fe",
   "metadata": {},
   "source": [
    "### 1. 安装HuggingFace 并下载模型到本地"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "02785614-9268-41c8-85a5-d579490edbbf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: sagemaker in /opt/conda/lib/python3.7/site-packages (2.183.0)\n",
      "Requirement already satisfied: attrs<24,>=23.1.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (23.1.0)\n",
      "Requirement already satisfied: boto3<2.0,>=1.26.131 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.28.41)\n",
      "Requirement already satisfied: cloudpickle==2.2.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (2.2.1)\n",
      "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.2.0)\n",
      "Requirement already satisfied: numpy<2.0,>=1.9.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.21.6)\n",
      "Requirement already satisfied: protobuf<5.0,>=3.12 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.20.3)\n",
      "Requirement already satisfied: smdebug-rulesconfig==1.0.1 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.0.1)\n",
      "Requirement already satisfied: importlib-metadata<7.0,>=1.4.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (4.13.0)\n",
      "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (23.1)\n",
      "Requirement already satisfied: pandas in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.3.5)\n",
      "Requirement already satisfied: pathos in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.3.0)\n",
      "Requirement already satisfied: schema in /opt/conda/lib/python3.7/site-packages (from sagemaker) (0.7.5)\n",
      "Requirement already satisfied: PyYAML~=6.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (6.0)\n",
      "Requirement already satisfied: jsonschema in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.2.0)\n",
      "Requirement already satisfied: platformdirs in /opt/conda/lib/python3.7/site-packages (from sagemaker) (3.5.3)\n",
      "Requirement already satisfied: tblib==1.7.0 in /opt/conda/lib/python3.7/site-packages (from sagemaker) (1.7.0)\n",
      "Requirement already satisfied: botocore<1.32.0,>=1.31.41 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.31.41)\n",
      "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.0.1)\n",
      "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (0.6.1)\n",
      "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<7.0,>=1.4.0->sagemaker) (2.2.0)\n",
      "Requirement already satisfied: typing-extensions>=3.6.4 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata<7.0,>=1.4.0->sagemaker) (4.6.3)\n",
      "Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from google-pasta->sagemaker) (1.16.0)\n",
      "Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.7/site-packages (from jsonschema->sagemaker) (0.15.7)\n",
      "Requirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from jsonschema->sagemaker) (65.5.1)\n",
      "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.7/site-packages (from pandas->sagemaker) (2019.3)\n",
      "Requirement already satisfied: ppft>=1.7.6.6 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (1.7.6.6)\n",
      "Requirement already satisfied: dill>=0.3.6 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.6)\n",
      "Requirement already satisfied: pox>=0.3.2 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.3.2)\n",
      "Requirement already satisfied: multiprocess>=0.70.14 in /opt/conda/lib/python3.7/site-packages (from pathos->sagemaker) (0.70.14)\n",
      "Requirement already satisfied: contextlib2>=0.5.5 in /opt/conda/lib/python3.7/site-packages (from schema->sagemaker) (0.6.0.post1)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.7/site-packages (from botocore<1.32.0,>=1.31.41->boto3<2.0,>=1.26.131->sagemaker) (1.26.16)\n",
      "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n",
      "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip install huggingface-hub -Uqq \n",
    "!pip install -U sagemaker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0ba24701-47db-4107-9a6c-1667038d0054",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!rm -rf ./LLM_qwen_int4_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9e6bd7ee-16a3-4f5a-8857-8bbba83eb9e7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import snapshot_download\n",
    "from pathlib import Path\n",
    "local_model_path = Path(\"./LLM_qwen_int4_model\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c3469632-4174-4df4-a7d1-ef167561c626",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "local_model_path.mkdir(exist_ok=True)\n",
    "model_name = \"Qwen/Qwen-7B-Chat-Int4\"\n",
    "commit_hash = \"955bbfa8c8f0c592b7713f1186c2c9ab477ef862\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "94e8abc5-a58e-40e2-b1e6-fbf48307c716",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3f359f1177a0424fbd51a62f405d931e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Fetching 32 files', max=32.0, style=ProgressStyle(descrip…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b2b5e1cbb2644b3d96d2dd60d0528e74",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)c9ab477ef862/LICENSE', max=6902.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fe8394a822a1400db2fdb395fbb4b16d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)ef862/.gitattributes', max=1603.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dead0ac536974dde8c582f3a76f41d3c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)2c9ab477ef862/NOTICE', max=2703.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c09f5409d52244f89a53a050991440ab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)ab477ef862/README.md', max=22265.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3b404138cdaa4340ad6904a13ef0ed38",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading hfagent_chat_2.png', max=1927640.0, style=Pro…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9ab6cfe42499467c9743117ad60a80d4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading cli_demo.gif', max=1981045.0, style=ProgressS…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b51333ea505741038b6f42aeff9848af",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)f862/assets/logo.jpg', max=109711.0, style…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7fb2a51141014f5c88faae6e3a752cb6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading hfagent_chat_1.png', max=1708738.0, style=Pro…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d68a8c70b1f42ac80b4329862a5f7f6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading hfagent_run.png', max=2770957.0, style=Progre…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "496601f4ab1d4f6ca944e4d8172dbe39",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading qwen_tokenizer.png', max=28742.0, style=Progr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1655830b9f12473393c6b6bba03d3500",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading openai_api.gif', max=1201656.0, style=Progres…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "062610d467f44a72a1bf18d96fc2f03a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading performance.png', max=118491.0, style=Progres…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "12c62d02646c479d9d0db13b2c0cbd0f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading react_showcase_001.png', max=309444.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "63b87263ed39410eb87fbec0e5e08930",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading react_showcase_002.png', max=629511.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e693657eabcb4f12b578b97dffba85b8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)assets/tokenizer.pdf', max=24682.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b2475e97795945f4a75f68779e5da047",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading react_tutorial_002.png', max=455206.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "208d64fea5ac416797426a595f1ce09e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading react_tutorial_001.png', max=384709.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9723393499af450e9141487022e1233b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading tokenizer.png', max=142281.0, style=ProgressS…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8df823f7a92b48e78208d8f4512a5819",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)x_colorful_black.png', max=1326970.0, styl…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9d8fe71d446f4d2bb6fec93b40754ad4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)477ef862/config.json', max=936.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c418d405270e47f7ad334c9559fa2b94",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)neration_config.json', max=221.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8646612d1db0429ab3003aeae8e6b67b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)onfiguration_qwen.py', max=2087.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6490d2fb73ad4e5b8f71f63594754cc5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading wechat.png', max=45699.0, style=ProgressStyle…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ed3cb188302b49af86f1987b8d0f36ab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading web_demo.gif', max=18786391.0, style=Progress…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "67d4e048d32e48e29e41323685691f79",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)862/modeling_qwen.py', max=47080.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7e783e9476ae452ea78a04235fba9d7b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)quantize_config.json', max=211.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b158a12e879d4bbf8fde9a8f4d3420cf",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)_generation_utils.py', max=14604.0, style=…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a48886788e8d4d5392f151e252f58710",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)bit-128g.safetensors', max=5860657576.0, s…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "418df9ddfa914f9a81d115ae3044ed99",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)7ef862/qwen.tiktoken', max=2561218.0, styl…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d9080a1d137945679293ebdc17b6a4bc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)tokenization_qwen.py', max=8443.0, style=P…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1a2503ab5ba243b59b5e787fe170d259",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)862/requirements.txt', max=90.0, style=Pro…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "88842910b03e44cba8728938285bfaba",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, description='Downloading (…)okenizer_config.json', max=173.0, style=Pr…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "snapshot_download(repo_id=model_name, revision=commit_hash, cache_dir=local_model_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d666c79-b039-4258-ac3b-46b19e63c3b8",
   "metadata": {},
   "source": [
    "### 2. 把模型拷贝到S3为后续部署做准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e9431deb-6359-442d-847b-1563f8dd3854",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import sagemaker\n",
    "from sagemaker import image_uris\n",
    "import boto3\n",
    "import os\n",
    "import time\n",
    "import json\n",
    "\n",
    "role = sagemaker.get_execution_role()  # execution role for the endpoint\n",
    "sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs\n",
    "bucket = sess.default_bucket()  # bucket to house artifacts\n",
    "\n",
    "region = sess._region_name\n",
    "account_id = sess.account_id()\n",
    "\n",
    "s3_client = boto3.client(\"s3\")\n",
    "sm_client = boto3.client(\"sagemaker\")\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "40dd8f16-ae7c-48bf-8e52-1a15425fa74d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s3_code_prefix: LLM-RAG/workshop/LLM_qwen_int4_stream_deploy_code\n",
      "model_snapshot_path: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862\n"
     ]
    }
   ],
   "source": [
    "s3_model_prefix = \"LLM-RAG/workshop/LLM_qwen_int4_stream_model\"  # folder where model checkpoint will go\n",
    "model_snapshot_path = list(local_model_path.glob(\"**/snapshots/*\"))[0]\n",
    "s3_code_prefix = \"LLM-RAG/workshop/LLM_qwen_int4_stream_deploy_code\"\n",
    "print(f\"s3_code_prefix: {s3_code_prefix}\")\n",
    "print(f\"model_snapshot_path: {model_snapshot_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "067292c9-c066-4649-a61f-b460a24da584",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/.gitattributes to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/.gitattributes\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/README.md to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/README.md\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/LICENSE to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/LICENSE\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/NOTICE to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/NOTICE\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/performance.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/performance.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/qwen_tokenizer.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/qwen_tokenizer.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/logo.jpg to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/logo.jpg\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/react_showcase_001.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/react_showcase_001.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/config.json\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/wechat.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/wechat.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/react_showcase_002.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/react_showcase_002.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/hfagent_chat_2.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/hfagent_chat_2.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/react_tutorial_001.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/react_tutorial_001.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/hfagent_run.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/hfagent_run.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/cli_demo.gif to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/cli_demo.gif\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/configuration_qwen.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/configuration_qwen.py\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/react_tutorial_002.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/react_tutorial_002.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/tokenizer.pdf to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/tokenizer.pdf\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/openai_api.gif to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/openai_api.gif\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/hfagent_chat_1.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/hfagent_chat_1.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/quantize_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/quantize_config.json\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/wanx_colorful_black.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/wanx_colorful_black.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/tokenizer.png to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/tokenizer.png\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/qwen.tiktoken to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/qwen.tiktoken\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/modeling_qwen.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/modeling_qwen.py\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/qwen_generation_utils.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/qwen_generation_utils.py\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/tokenization_qwen.py to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/tokenization_qwen.py\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/tokenizer_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/tokenizer_config.json\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/requirements.txt to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/requirements.txt\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/generation_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/generation_config.json\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/assets/web_demo.gif to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/assets/web_demo.gif\n",
      "upload: LLM_qwen_int4_model/models--Qwen--Qwen-7B-Chat-Int4/snapshots/955bbfa8c8f0c592b7713f1186c2c9ab477ef862/gptq_model-4bit-128g.safetensors to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/gptq_model-4bit-128g.safetensors\n"
     ]
    }
   ],
   "source": [
    "!aws s3 cp --recursive {model_snapshot_path} s3://{bucket}/{s3_model_prefix}"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "696b70c3-90f1-4175-95bf-568bafbcd383",
   "metadata": {},
   "source": [
    "### 3. 模型部署准备（entrypoint脚本，容器镜像，服务配置）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "6f7c4277-4480-42c6-aee6-1fbcca94eb82",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n"
     ]
    }
   ],
   "source": [
    "#中国区需要替换为下面的image_uri\n",
    "# inference_image_uri = (\n",
    "#     f\"727897471807.dkr.ecr.{region}.amazonaws.com.cn/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n",
    "# )\n",
    "\n",
    "\n",
    "inference_image_uri = image_uris.retrieve(\n",
    "    framework=\"djl-deepspeed\",\n",
    "    region=sess.boto_session.region_name,\n",
    "    version=\"0.23.0\"\n",
    ")\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "8d771bdb-11d2-45d2-9bef-face29221838",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!mkdir -p LLM_qwen_int4_stream_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "e5348ecb-43df-4094-97d8-a6723004862a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_qwen_int4_stream_deploy_code/model.py\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_qwen_int4_stream_deploy_code/model.py\n",
    "from djl_python import Input, Output\n",
    "import torch\n",
    "import logging\n",
    "import math\n",
    "import os\n",
    "\n",
    "from transformers import AutoTokenizer\n",
    "from transformers.generation import GenerationConfig\n",
    "from auto_gptq import AutoGPTQForCausalLM\n",
    "\n",
    "\n",
    "STOP_flag = \"[DONE]\"\n",
    "\n",
    "\n",
    "def load_model(properties):\n",
    "    tensor_parallel = properties[\"tensor_parallel_degree\"]\n",
    "    model_location = properties['model_dir']\n",
    "    if \"model_id\" in properties:\n",
    "        model_location = properties['model_id']\n",
    "    logging.info(f\"Loading model in {model_location}\")\n",
    "    \n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_location, trust_remote_code=True)\n",
    "    model = AutoGPTQForCausalLM.from_quantized(model_location, device_map=\"auto\", trust_remote_code=True, use_safetensors=True).eval()\n",
    "    config = GenerationConfig.from_pretrained(model_location, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参\n",
    "    return model, tokenizer, config\n",
    "\n",
    "\n",
    "model = None\n",
    "tokenizer = None\n",
    "generator = None\n",
    "config = None\n",
    "\n",
    "def stream_items(prompt, history, max_length, top_p, temperature):\n",
    "    global model, tokenizer, config\n",
    "    size = 0\n",
    "    response = \"\"\n",
    "    config.max_new_tokens = max_length\n",
    "    config.top_p = top_p\n",
    "    \n",
    "    ##传入temperature会报错\n",
    "    ##model.generation_config.temperature = temperature \n",
    "    res_generator = model.chat_stream(tokenizer, prompt, history=history,generation_config=config)\n",
    "    for response in res_generator:\n",
    "        this_response = response[size:]\n",
    "        size = len(response)\n",
    "        stream_buffer = { \"outputs\":this_response,\"finished\": False}\n",
    "        yield stream_buffer\n",
    "    ## stop\n",
    "    # yield {\"query\": prompt, \"outputs\": STOP_flag, \"response\": response, \"history\": [], \"finished\": True}\n",
    "\n",
    "\n",
    "def handle(inputs: Input):\n",
    "    global model, tokenizer,config\n",
    "    if not model:\n",
    "        model, tokenizer,config = load_model(inputs.get_properties())\n",
    "\n",
    "    if inputs.is_empty():\n",
    "        return None\n",
    "    data = inputs.get_as_json()\n",
    "    \n",
    "    input_sentences = data[\"inputs\"]\n",
    "    params = data[\"parameters\"]\n",
    "    history = data.get(\"history\",[])\n",
    "    stream = data.get('stream')\n",
    "    print(f'input prompt:{input_sentences}')   \n",
    "    outputs = Output()\n",
    "    if stream:\n",
    "        outputs.add_property(\"content-type\", \"application/jsonlines\")\n",
    "        outputs.add_stream_content(stream_items(input_sentences,history=history,**params))\n",
    "    else:\n",
    "        config.max_new_tokens = params.get('max_length',1024)\n",
    "        config.top_p = params.get('top_p',1)\n",
    "        response, history = model.chat(tokenizer, input_sentences, history=history,generation_config=config)\n",
    "        result = {\"outputs\": response, \"history\" : history}\n",
    "        outputs.add_as_json(result)\n",
    "        \n",
    "    return outputs"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a06d1e60-3914-4059-a08f-05ac26761165",
   "metadata": {},
   "source": [
    "#### Note: option.s3url 需要按照自己的账号进行修改"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "8996fe44-8e70-468b-abc1-38187cb33f4f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_qwen_int4_stream_deploy_code/serving.properties\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_qwen_int4_stream_deploy_code/serving.properties\n",
    "engine=Python\n",
    "option.tensor_parallel_degree=1\n",
    "option.enable_streaming=True\n",
    "option.predict_timeout=240\n",
    "option.s3url = s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_model/"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "feef22a2-27b9-4018-a46b-6a99b532512f",
   "metadata": {},
   "source": [
    "#### 注意: 必须把transformers升级到4.27.1以上，否则会出现 [Issue344](https://github.com/THUDM/ChatGLM-6B/issues/344)\n",
    "\n",
    "如果是中国区建议添加国内的pip镜像,如下代码所示\n",
    "```\n",
    "%%writefile LLM_chatglm_deploy_code/requirements.txt\n",
    "-i https://pypi.tuna.tsinghua.edu.cn/simple\n",
    "transformers==4.28.1\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "7b7e76c6-6dbc-47fc-9f47-4765c526ab76",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting LLM_qwen_int4_stream_deploy_code/requirements.txt\n"
     ]
    }
   ],
   "source": [
    "%%writefile LLM_qwen_int4_stream_deploy_code/requirements.txt\n",
    "transformers==4.31.0\n",
    "accelerate\n",
    "tiktoken\n",
    "einops\n",
    "transformers_stream_generator==0.0.4\n",
    "scipy\n",
    "gekko\n",
    "auto-gptq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "199907e8-dde4-43b5-a6f3-82f46a6bf6f3",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# !pip install auto-gptq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "0ae6734a-aacd-410d-818d-0a962697c3c4",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LLM_qwen_int4_stream_deploy_code/\n",
      "LLM_qwen_int4_stream_deploy_code/model.py\n",
      "LLM_qwen_int4_stream_deploy_code/requirements.txt\n",
      "LLM_qwen_int4_stream_deploy_code/serving.properties\n"
     ]
    }
   ],
   "source": [
    "!rm model.tar.gz\n",
    "!cd LLM_qwen_int4_stream_deploy_code && rm -rf \".ipynb_checkpoints\"\n",
    "!tar czvf model.tar.gz LLM_qwen_int4_stream_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "0f77dc76-6d8c-4665-ba88-f03e887c136c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/LLM_qwen_int4_stream_deploy_code/model.tar.gz\n"
     ]
    }
   ],
   "source": [
    "s3_code_artifact = sess.upload_data(\"model.tar.gz\", bucket, s3_code_prefix)\n",
    "print(f\"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a5853daa-b8a3-4485-8c0a-64bf83e93a18",
   "metadata": {},
   "source": [
    "### 4. 创建模型 & 创建endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "ef974ca1-9638-45a8-9145-ea9d03b2b072",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "qwen-stream-int4-2023-08-31-02-25-35-582\n",
      "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\n",
      "Created Model: arn:aws:sagemaker:us-east-2:946277762357:model/qwen-stream-int4-2023-08-31-02-25-35-582\n"
     ]
    }
   ],
   "source": [
    "from sagemaker.utils import name_from_base\n",
    "import boto3\n",
    "\n",
    "model_name = name_from_base(f\"qwen-stream-int4\") #Note: Need to specify model_name\n",
    "print(model_name)\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")\n",
    "\n",
    "create_model_response = sm_client.create_model(\n",
    "    ModelName=model_name,\n",
    "    ExecutionRoleArn=role,\n",
    "    PrimaryContainer={\n",
    "        \"Image\": inference_image_uri,\n",
    "        \"ModelDataUrl\": s3_code_artifact\n",
    "    },\n",
    "    \n",
    ")\n",
    "model_arn = create_model_response[\"ModelArn\"]\n",
    "\n",
    "print(f\"Created Model: {model_arn}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "233bb3a4-d737-41ad-8fcc-7082c6278e8c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-2:946277762357:endpoint-config/qwen-stream-int4-2023-08-31-02-25-35-582-config',\n",
       " 'ResponseMetadata': {'RequestId': '03bddef3-a6dc-4271-9625-67fba840982a',\n",
       "  'HTTPStatusCode': 200,\n",
       "  'HTTPHeaders': {'x-amzn-requestid': '03bddef3-a6dc-4271-9625-67fba840982a',\n",
       "   'content-type': 'application/x-amz-json-1.1',\n",
       "   'content-length': '128',\n",
       "   'date': 'Thu, 31 Aug 2023 02:25:38 GMT'},\n",
       "  'RetryAttempts': 0}}"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "endpoint_config_name = f\"{model_name}-config\"\n",
    "endpoint_name = f\"{model_name}-endpoint\"\n",
    "\n",
    "#Note: ml.g4dn.2xlarge 也可以选择\n",
    "endpoint_config_response = sm_client.create_endpoint_config(\n",
    "    EndpointConfigName=endpoint_config_name,\n",
    "    ProductionVariants=[\n",
    "        {\n",
    "            \"VariantName\": \"variant1\",\n",
    "            \"ModelName\": model_name,\n",
    "            \"InstanceType\": \"ml.g5.2xlarge\",\n",
    "            \"InitialInstanceCount\": 1,\n",
    "            # \"VolumeSizeInGB\" : 400,\n",
    "            # \"ModelDataDownloadTimeoutInSeconds\": 2400,\n",
    "            \"ContainerStartupHealthCheckTimeoutInSeconds\": 10*60,\n",
    "        },\n",
    "    ],\n",
    ")\n",
    "endpoint_config_response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "734a39b0-473e-4421-94c8-74d2b4105038",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created Endpoint: arn:aws:sagemaker:us-east-2:946277762357:endpoint/qwen-stream-int4-2023-08-31-02-25-35-582-endpoint\n"
     ]
    }
   ],
   "source": [
    "create_endpoint_response = sm_client.create_endpoint(\n",
    "    EndpointName=f\"{endpoint_name}\", EndpointConfigName=endpoint_config_name\n",
    ")\n",
    "print(f\"Created Endpoint: {create_endpoint_response['EndpointArn']}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1262e826-a810-401d-a5a9-f62febb24e5f",
   "metadata": {},
   "source": [
    "#### 持续检测模型部署进度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "08969928-6b9e-4d9c-a033-a31f5f77bdfb",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: Creating\n",
      "Status: InService\n",
      "Arn: arn:aws:sagemaker:us-east-2:946277762357:endpoint/qwen-stream-int4-2023-08-31-02-25-35-582-endpoint\n",
      "Status: InService\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "status = resp[\"EndpointStatus\"]\n",
    "print(\"Status: \" + status)\n",
    "\n",
    "while status == \"Creating\":\n",
    "    time.sleep(60)\n",
    "    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "    status = resp[\"EndpointStatus\"]\n",
    "    print(\"Status: \" + status)\n",
    "\n",
    "print(\"Arn: \" + resp[\"EndpointArn\"])\n",
    "print(\"Status: \" + status)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d985b427-3959-46f7-9a50-5a2b45e2d513",
   "metadata": {},
   "source": [
    "### 5. 模型测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e56bfdaa-3469-4784-aa8a-e32177cde3f2",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 288 ms, sys: 45.8 ms, total: 334 ms\n",
      "Wall time: 419 ms\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.7/site-packages/boto3/compat.py:82: PythonDeprecationWarning: Boto3 will no longer support Python 3.7 starting December 13, 2023. To continue receiving service updates, bug fixes, and security updates please upgrade to Python 3.8 or later. More information can be found here: https://aws.amazon.com/blogs/developer/python-support-policy-updates-for-aws-sdks-and-tools/\n",
      "  warnings.warn(warning, PythonDeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "import json\n",
    "import boto3\n",
    "\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")\n",
    "\n",
    "parameters = {\n",
    "  \"max_length\": 1024,\n",
    "  \"temperature\": 0.1,\n",
    "  \"top_p\":0.8\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ae5983aa-0c91-4c78-a63f-7192a39a8cfb",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import io\n",
    "\n",
    "\n",
    "class StreamScanner:\n",
    "    \"\"\"\n",
    "    A helper class for parsing the InvokeEndpointWithResponseStream event stream. \n",
    "    \n",
    "    The output of the model will be in the following format:\n",
    "    ```\n",
    "    b'{\"outputs\": [\" a\"]}\\n'\n",
    "    b'{\"outputs\": [\" challenging\"]}\\n'\n",
    "    b'{\"outputs\": [\" problem\"]}\\n'\n",
    "    ...\n",
    "    ```\n",
    "    \n",
    "    While usually each PayloadPart event from the event stream will contain a byte array \n",
    "    with a full json, this is not guaranteed and some of the json objects may be split across\n",
    "    PayloadPart events. For example:\n",
    "    ```\n",
    "    {'PayloadPart': {'Bytes': b'{\"outputs\": '}}\n",
    "    {'PayloadPart': {'Bytes': b'[\" problem\"]}\\n'}}\n",
    "    ```\n",
    "    \n",
    "    This class accounts for this by concatenating bytes written via the 'write' function\n",
    "    and then exposing a method which will return lines (ending with a '\\n' character) within\n",
    "    the buffer via the 'readlines' function. It maintains the position of the last read \n",
    "    position to ensure that previous bytes are not exposed again. \n",
    "    \"\"\"\n",
    "    \n",
    "    def __init__(self):\n",
    "        self.buff = io.BytesIO()\n",
    "        self.read_pos = 0\n",
    "        \n",
    "    def write(self, content):\n",
    "        self.buff.seek(0, io.SEEK_END)\n",
    "        self.buff.write(content)\n",
    "        \n",
    "    def readlines(self):\n",
    "        self.buff.seek(self.read_pos)\n",
    "        for line in self.buff.readlines():\n",
    "            if line[-1] != b'\\n':\n",
    "                self.read_pos += len(line)\n",
    "                yield line[:-1]\n",
    "                \n",
    "    def reset(self):\n",
    "        self.read_pos = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad367ddf-96b6-40e1-938e-2a9aa0f03b0c",
   "metadata": {},
   "source": [
    "## Stream"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "84f9219f-fa4d-413e-b02d-2047142b4a79",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "故事发生在遥远的未来，地球已被毁灭，人类为了生存，不得不开始寻找新的家园。在宇宙中，他们发现了无数的星球，但是每个星球都有不同的危险。于是，人类开始建立联盟，共同抵抗宇宙中的敌人。\n",
      "\n",
      "联盟中最强大的一支军队，由人类最聪明的人组成，他们被称为“智慧军”。智慧军的任务是保护联盟的星球，同时也探索宇宙中的秘密。他们乘坐着最先进的飞船，穿梭在宇宙中，寻找敌人的踪迹。\n",
      "\n",
      "有一天，智慧军接到了一个紧急任务。他们接到了来自遥远星球的求救信号。这个星球被一种强大的外星种族占领，他们威胁要毁灭整个星球。智慧军立即出发，前往这个星球。\n",
      "\n",
      "当他们到达星球时，他们发现整个星球已经被外星种族的基地所包围。智慧军需要想出一个聪明的计划来击败外星种族。\n",
      "\n",
      "经过一番讨论，智慧军制定出了一个大胆的计划。他们利用星球上的自然元素，制造出了无数的机器人和无人机，这些机器人和无人机可以攻击外星种族的基地。然后，智慧军派遣了一些精英士兵潜入外星种族的基地，破坏了他们的电脑系统。\n",
      "\n",
      "外星种族的基地受到了攻击，他们非常惊慌失措。但是他们很快恢复了秩序，并开始反击。智慧军的机器人和无人机与外星种族的机器人和战机展开了激战。经过一番苦战，智慧军最终获得了胜利。\n",
      "\n",
      "智慧军拯救了这个星球，并获得了外星种族的尊重。他们与外星种族建立了友好的关系，并开始共同探索宇宙中的秘密。\n",
      "time:11.242651462554932 s\n"
     ]
    }
   ],
   "source": [
    "# prompts1 = \"\"\"你\"\"\"\n",
    "import time\n",
    "\n",
    "start = time.time()\n",
    "prompts1 = \"\"\"写一篇500字的科幻小说，背景关于宇宙战争\"\"\"\n",
    "# prompts1 = \"\"\"AWS Clean Rooms 的FAQ文档有提到 Q: 是否发起者和数据贡献者都会被收费？A: 是单方收费，只有查询的接收方会收费。\n",
    "# 请问AWS Clean Rooms是多方都会收费吗？\n",
    "# \"\"\"\n",
    "response_model = smr_client.invoke_endpoint_with_response_stream(\n",
    "            EndpointName=endpoint_name,\n",
    "            Body=json.dumps(\n",
    "            {\n",
    "                \"inputs\": prompts1,\n",
    "                \"parameters\": parameters,\n",
    "                \"history\" : [],\n",
    "                \"stream\":True\n",
    "            }\n",
    "            ),\n",
    "            ContentType=\"application/json\",\n",
    "        )\n",
    "\n",
    "event_stream = response_model['Body']\n",
    "scanner = StreamScanner()\n",
    "for event in event_stream:\n",
    "    scanner.write(event['PayloadPart']['Bytes'])\n",
    "    for line in scanner.readlines():\n",
    "        try:\n",
    "            resp = json.loads(line)\n",
    "            # print(resp)\n",
    "            print(resp.get(\"outputs\")['outputs'], end='')\n",
    "        except Exception as e:\n",
    "            print(line)\n",
    "            continue\n",
    "print (f\"\\ntime:{time.time()-start} s\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "619db19b-0072-4d25-a4df-5d59c2f6947b",
   "metadata": {},
   "source": [
    "## None stream"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "d577e076-52b2-4257-a447-1d3a5813d7ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "time:12.590379476547241 s\n",
      "在遥远的宇宙中，有两个强大的文明，分别是赛拉克斯文明和阿瑞斯文明。这两个文明之间的矛盾已经持续了很长时间，双方之间的战争也从未停歇。\n",
      "\n",
      "赛拉克斯文明是一个科技高度发达的文明，他们拥有一种叫做“能源炮”的武器，可以发射出强大的能量束，瞬间摧毁敌人的防御系统。而阿瑞斯文明则更注重实战经验，他们的战士们有着强大的体能和战斗技巧，可以与赛拉克斯文明的高科技武器相抗衡。\n",
      "\n",
      "在一场决定宇宙命运的战争中，赛拉克斯文明的能源炮被阿瑞斯文明的战士们成功摧毁，让赛拉克斯文明的军队陷入了困境。但赛拉克斯文明并没有放弃，他们开始研发一种新型的能源武器，这种武器可以比能源炮更加先进和强大。\n",
      "\n",
      "经过几个月的艰苦研发，赛拉克斯文明终于成功地制造出了新型的能源武器。这种武器不仅可以发射出更强大的能量束，还可以进行智能化攻击，可以根据敌人的情况进行变化和调整。\n",
      "\n",
      "在另一场决定宇宙命运的战争中，赛拉克斯文明的新型能源武器终于派上了用场。在激烈的战斗中，赛拉克斯文明的军队成功地使用新型能源武器摧毁了阿瑞斯文明的防御系统，并将阿瑞斯文明的军队彻底击败。\n",
      "\n",
      "随着赛拉克斯文明的胜利，宇宙的和平也终于来临。赛拉克斯文明的科学家们也开始致力于探索宇宙的奥秘，希望可以找到更多的先进技术和知识，让宇宙更加美好。而阿瑞斯文明的战士们则开始重新学习战斗技巧，希望可以在未来的战争中重新崛起。\n",
      "\n",
      "在宇宙的漫长岁月中，战争和和平一直在循环。但只有那些不断探索和学习的文明，才能在宇宙中生存和繁衍下去。\n"
     ]
    }
   ],
   "source": [
    "\n",
    "endpoint_name= 'qwen-stream-int4-2023-08-31-02-25-35-582-endpoint'\n",
    "prompts1 = \"\"\"AWS Clean Rooms 的FAQ文档有提到 Q: 是否发起者和数据贡献者都会被收费？A: 是单方收费，只有查询的接收方会收费。\n",
    "请问AWS Clean Rooms是多方都会收费吗？\n",
    "\"\"\"\n",
    "prompts1 = \"\"\"写一篇500字的科幻小说，背景关于宇宙战争\"\"\"\n",
    "start = time.time()\n",
    "response_model = smr_client.invoke_endpoint(\n",
    "            EndpointName=endpoint_name,\n",
    "            Body=json.dumps(\n",
    "            {\n",
    "                \"inputs\": prompts1,\n",
    "                \"parameters\": parameters,\n",
    "                \"history\" : [],\n",
    "            }\n",
    "            ),\n",
    "            ContentType=\"application/json\",\n",
    "        )\n",
    "\n",
    "resp = response_model['Body'].read()\n",
    "print (f\"\\ntime:{time.time()-start} s\")\n",
    "# print(resp.decode('utf8'))\n",
    "print(json.loads(resp)['outputs'])\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21c8b703-e312-4964-8be9-a754468e07cd",
   "metadata": {},
   "source": [
    "#### 清除模型Endpoint和config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "f70d116f-4fb1-4f04-8732-3d6e4fb520de",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint --endpoint-name {endpoint_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "184e4d1d-3d62-43df-9b17-5d64ece928bd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint-config --endpoint-config-name {endpoint_config_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "707e8f09",
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws sagemaker delete-model --model-name {model_name}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0fa1499-0cee-4a02-b828-f3de1b24e875",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "availableInstances": [
   {
    "_defaultOrder": 0,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.t3.medium",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 1,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.t3.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 2,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.t3.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 3,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.t3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 4,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 5,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 6,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 7,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 8,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 9,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 10,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 11,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 12,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5d.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 13,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5d.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 14,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5d.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 15,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5d.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 16,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5d.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 17,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5d.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 18,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5d.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 19,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 20,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": true,
    "memoryGiB": 0,
    "name": "ml.geospatial.interactive",
    "supportedImageNames": [
     "sagemaker-geospatial-v1-0"
    ],
    "vcpuNum": 0
   },
   {
    "_defaultOrder": 21,
    "_isFastLaunch": true,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.c5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 22,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.c5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 23,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.c5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 24,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.c5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 25,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 72,
    "name": "ml.c5.9xlarge",
    "vcpuNum": 36
   },
   {
    "_defaultOrder": 26,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 96,
    "name": "ml.c5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 27,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 144,
    "name": "ml.c5.18xlarge",
    "vcpuNum": 72
   },
   {
    "_defaultOrder": 28,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.c5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 29,
    "_isFastLaunch": true,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g4dn.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 30,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g4dn.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 31,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g4dn.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 32,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g4dn.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 33,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g4dn.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 34,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g4dn.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 35,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 61,
    "name": "ml.p3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 36,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 244,
    "name": "ml.p3.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 37,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 488,
    "name": "ml.p3.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 38,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.p3dn.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 39,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.r5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 40,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.r5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 41,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.r5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 42,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.r5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 43,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.r5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 44,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.r5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 45,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 512,
    "name": "ml.r5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 46,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.r5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 47,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 48,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 49,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 50,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 51,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 52,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 53,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.g5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 54,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.g5.48xlarge",
    "vcpuNum": 192
   },
   {
    "_defaultOrder": 55,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 56,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4de.24xlarge",
    "vcpuNum": 96
   }
  ],
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
